/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    RandomTree.java
 *    Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.classifiers.trees;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Random;
import java.util.Vector;

import utilities.Util;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.ContingencyTables;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.PartitionGenerator;
import weka.core.Randomizable;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * <!-- globalinfo-start --> Class for constructing a tree that considers K
 * randomly chosen attributes at each node. Performs no pruning. Also has an
 * option to allow estimation of class probabilities based on a hold-out set
 * (backfitting).
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 * -K &lt;number of attributes&gt;
 *  Number of attributes to randomly investigate
 *  (&lt;0 = int(log_2(#attributes)+1)).
 * </pre>
 * 
 * <pre>
 * -M &lt;minimum number of instances&gt;
 *  Set minimum number of instances per leaf.
 * </pre>
 * 
 * <pre>
 * -S &lt;num&gt;
 *  Seed for random number generator.
 *  (default 1)
 * </pre>
 * 
 * <pre>
 * -depth &lt;num&gt;
 *  The maximum depth of the tree, 0 for unlimited.
 *  (default 0)
 * </pre>
 * 
 * <pre>
 * -N &lt;num&gt;
 *  Number of folds for backfitting (default 0, no backfitting).
 * </pre>
 * 
 * <pre>
 * -U
 *  Allow unclassified instances.
 * </pre>
 * 
 * <pre>
 * -D
 *  If set, classifier is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
 * @version $Revision: 9526 $
 */
public class RandomTree extends AbstractClassifier
		implements OptionHandler, WeightedInstancesHandler, Randomizable, Drawable, PartitionGenerator {

	/** for serialization */
	static final long serialVersionUID = 8934314652175299374L;

	/** The Tree object */
	protected Tree m_Tree = null;

	/** The header information. */
	protected Instances m_Info = null;

	/** Minimum number of instances for leaf. */
	protected double m_MinNum = 1.0;

	/** The number of attributes considered for a split. */
	protected int m_KValue = 0;

	/** The random seed to use. */
	protected int m_randomSeed = 1;

	/** The maximum depth of the tree (0 = unlimited) */
	protected int m_MaxDepth = 0;

	/** Determines how much data is used for backfitting */
	protected int m_NumFolds = 0;

	/** Whether unclassified instances are allowed */
	protected boolean m_AllowUnclassifiedInstances = false;

	/** a ZeroR model in case no model can be built from the data */
	protected Classifier m_zeroR;

	/**
	 * Returns a string describing classifier
	 * 
	 * @return a description suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String globalInfo() {

		return "Class for constructing a tree that considers K randomly "
				+ " chosen attributes at each node. Performs no pruning. Also has"
				+ " an option to allow estimation of class probabilities based on" + " a hold-out set (backfitting).";
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String minNumTipText() {
		return "The minimum total weight of the instances in a leaf.";
	}

	/**
	 * Get the value of MinNum.
	 * 
	 * @return Value of MinNum.
	 */
	public double getMinNum() {

		return m_MinNum;
	}

	/**
	 * Set the value of MinNum.
	 * 
	 * @param newMinNum
	 *            Value to assign to MinNum.
	 */
	public void setMinNum(double newMinNum) {

		m_MinNum = newMinNum;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String KValueTipText() {
		return "Sets the number of randomly chosen attributes. If 0, log_2(number_of_attributes) + 1 is used.";
	}

	/**
	 * Get the value of K.
	 * 
	 * @return Value of K.
	 */
	public int getKValue() {

		return m_KValue;
	}

	/**
	 * Set the value of K.
	 * 
	 * @param k
	 *            Value to assign to K.
	 */
	public void setKValue(int k) {

		m_KValue = k;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String seedTipText() {
		return "The random number seed used for selecting attributes.";
	}

	/**
	 * Set the seed for random number generation.
	 * 
	 * @param seed
	 *            the seed
	 */
	@Override
	public void setSeed(int seed) {

		m_randomSeed = seed;
	}

	/**
	 * Gets the seed for the random number generations
	 * 
	 * @return the seed for the random number generation
	 */
	@Override
	public int getSeed() {

		return m_randomSeed;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String maxDepthTipText() {
		return "The maximum depth of the tree, 0 for unlimited.";
	}

	/**
	 * Get the maximum depth of trh tree, 0 for unlimited.
	 * 
	 * @return the maximum depth.
	 */
	public int getMaxDepth() {
		return m_MaxDepth;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String numFoldsTipText() {
		return "Determines the amount of data used for backfitting. One fold is used for "
				+ "backfitting, the rest for growing the tree. (Default: 0, no backfitting)";
	}

	/**
	 * Get the value of NumFolds.
	 * 
	 * @return Value of NumFolds.
	 */
	public int getNumFolds() {

		return m_NumFolds;
	}

	/**
	 * Set the value of NumFolds.
	 * 
	 * @param newNumFolds
	 *            Value to assign to NumFolds.
	 */
	public void setNumFolds(int newNumFolds) {

		m_NumFolds = newNumFolds;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String allowUnclassifiedInstancesTipText() {
		return "Whether to allow unclassified instances.";
	}

	/**
	 * Get the value of NumFolds.
	 * 
	 * @return Value of NumFolds.
	 */
	public boolean getAllowUnclassifiedInstances() {

		return m_AllowUnclassifiedInstances;
	}

	/**
	 * Set the value of AllowUnclassifiedInstances.
	 * 
	 * @param newAllowUnclassifiedInstances
	 *            Value to assign to AllowUnclassifiedInstances.
	 */
	public void setAllowUnclassifiedInstances(boolean newAllowUnclassifiedInstances) {

		m_AllowUnclassifiedInstances = newAllowUnclassifiedInstances;
	}

	/**
	 * Set the maximum depth of the tree, 0 for unlimited.
	 * 
	 * @param value
	 *            the maximum depth.
	 */
	public void setMaxDepth(int value) {
		m_MaxDepth = value;
	}

	/**
	 * Lists the command-line options for this classifier.
	 * 
	 * @return an enumeration over all possible options
	 */
	@Override
	public Enumeration listOptions() {

		Vector newVector = new Vector();

		newVector.addElement(
				new Option("\tNumber of attributes to randomly investigate\n" + "\t(<0 = int(log_2(#attributes)+1)).",
						"K", 1, "-K <number of attributes>"));

		newVector.addElement(
				new Option("\tSet minimum number of instances per leaf.", "M", 1, "-M <minimum number of instances>"));

		newVector.addElement(new Option("\tSeed for random number generator.\n" + "\t(default 1)", "S", 1, "-S <num>"));

		newVector.addElement(new Option("\tThe maximum depth of the tree, 0 for unlimited.\n" + "\t(default 0)",
				"depth", 1, "-depth <num>"));

		newVector.addElement(
				new Option("\tNumber of folds for backfitting " + "(default 0, no backfitting).", "N", 1, "-N <num>"));
		newVector.addElement(new Option("\tAllow unclassified instances.", "U", 0, "-U"));

		Enumeration enu = super.listOptions();
		while (enu.hasMoreElements()) {
			newVector.addElement(enu.nextElement());
		}

		return newVector.elements();
	}

	/**
	 * Gets options from this classifier.
	 * 
	 * @return the options for the current setup
	 */
	@Override
	public String[] getOptions() {
		Vector result;
		String[] options;
		int i;

		result = new Vector();

		result.add("-K");
		result.add("" + getKValue());

		result.add("-M");
		result.add("" + getMinNum());

		result.add("-S");
		result.add("" + getSeed());

		if (getMaxDepth() > 0) {
			result.add("-depth");
			result.add("" + getMaxDepth());
		}

		if (getNumFolds() > 0) {
			result.add("-N");
			result.add("" + getNumFolds());
		}

		if (getAllowUnclassifiedInstances()) {
			result.add("-U");
		}

		options = super.getOptions();
		for (i = 0; i < options.length; i++)
			result.add(options[i]);

		return (String[]) result.toArray(new String[result.size()]);
	}

	/**
	 * Parses a given list of options.
	 * <p/>
	 * 
	 * <!-- options-start --> Valid options are:
	 * <p/>
	 * 
	 * <pre>
	 * -K &lt;number of attributes&gt;
	 *  Number of attributes to randomly investigate
	 *  (&lt;0 = int(log_2(#attributes)+1)).
	 * </pre>
	 * 
	 * <pre>
	 * -M &lt;minimum number of instances&gt;
	 *  Set minimum number of instances per leaf.
	 * </pre>
	 * 
	 * <pre>
	 * -S &lt;num&gt;
	 *  Seed for random number generator.
	 *  (default 1)
	 * </pre>
	 * 
	 * <pre>
	 * -depth &lt;num&gt;
	 *  The maximum depth of the tree, 0 for unlimited.
	 *  (default 0)
	 * </pre>
	 * 
	 * <pre>
	 * -N &lt;num&gt;
	 *  Number of folds for backfitting (default 0, no backfitting).
	 * </pre>
	 * 
	 * <pre>
	 * -U
	 *  Allow unclassified instances.
	 * </pre>
	 * 
	 * <pre>
	 * -D
	 *  If set, classifier is run in debug mode and
	 *  may output additional info to the console
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	@Override
	public void setOptions(String[] options) throws Exception {
		String tmpStr;

		tmpStr = Utils.getOption('K', options);
		if (tmpStr.length() != 0) {
			m_KValue = Integer.parseInt(tmpStr);
		} else {
			m_KValue = 0;
		}

		tmpStr = Utils.getOption('M', options);
		if (tmpStr.length() != 0) {
			m_MinNum = Double.parseDouble(tmpStr);
		} else {
			m_MinNum = 1;
		}

		tmpStr = Utils.getOption('S', options);
		if (tmpStr.length() != 0) {
			setSeed(Integer.parseInt(tmpStr));
		} else {
			setSeed(1);
		}

		tmpStr = Utils.getOption("depth", options);
		if (tmpStr.length() != 0) {
			setMaxDepth(Integer.parseInt(tmpStr));
		} else {
			setMaxDepth(0);
		}
		String numFoldsString = Utils.getOption('N', options);
		if (numFoldsString.length() != 0) {
			m_NumFolds = Integer.parseInt(numFoldsString);
		} else {
			m_NumFolds = 0;
		}

		setAllowUnclassifiedInstances(Utils.getFlag('U', options));

		super.setOptions(options);

		Utils.checkForRemainingOptions(options);
	}

	/**
	 * Returns default capabilities of the classifier.
	 * 
	 * @return the capabilities of this classifier
	 */
	@Override
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();
		result.disableAll();

		// attributes
		result.enable(Capability.NOMINAL_ATTRIBUTES);
		result.enable(Capability.NUMERIC_ATTRIBUTES);
		result.enable(Capability.DATE_ATTRIBUTES);
		result.enable(Capability.MISSING_VALUES);

		// class
		result.enable(Capability.NOMINAL_CLASS);
		result.enable(Capability.MISSING_CLASS_VALUES);

		return result;
	}

	/**
	 * Builds classifier.
	 * 
	 * @param data
	 *            the data to train with
	 * @throws Exception
	 *             if something goes wrong or the data doesn't fit
	 */
	@Override
	public void buildClassifier(Instances data) throws Exception {

		// Make sure K value is in range
		if (m_KValue > data.numAttributes() - 1)
			m_KValue = data.numAttributes() - 1;
		if (m_KValue < 1)
			m_KValue = (int) Utils.log2(data.numAttributes()) + 1;

		// can classifier handle the data?
		getCapabilities().testWithFail(data);

		// remove instances with missing class
		data = new Instances(data);
		data.deleteWithMissingClass();

		// only class? -> build ZeroR model
		if (data.numAttributes() == 1) {
			System.err.println(
					"Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!");
			m_zeroR = new weka.classifiers.rules.ZeroR();
			m_zeroR.buildClassifier(data);
			return;
		} else {
			m_zeroR = null;
		}

		// Figure out appropriate datasets
		Instances train = null;
		Instances backfit = null;
		Random rand = data.getRandomNumberGenerator(m_randomSeed);
		if (m_NumFolds <= 0) {
			train = data;
		} else {
			data.randomize(rand);
			data.stratify(m_NumFolds);
			train = data.trainCV(m_NumFolds, 1, rand);
			backfit = data.testCV(m_NumFolds, 1);
		}

		// Create the attribute indices window
		int[] attIndicesWindow = new int[data.numAttributes() - 1];
		int j = 0;
		for (int i = 0; i < attIndicesWindow.length; i++) {
			if (j == data.classIndex())
				j++; // do not include the class
			attIndicesWindow[i] = j++;
		}

		// Compute initial class counts
		double[] classProbs = new double[train.numClasses()];
		for (int i = 0; i < train.numInstances(); i++) {
			Instance inst = train.instance(i);
			classProbs[(int) inst.classValue()] += inst.weight();
		}

		// Build tree
		m_Tree = new Tree();
		m_Info = new Instances(data, 0);
		m_Tree.buildTree(train, classProbs, attIndicesWindow, rand, 0);

		// Backfit if required
		if (backfit != null) {
			m_Tree.backfitData(backfit);
		}
	}

	/**
	 * Computes class distribution of an instance using the tree.
	 * 
	 * @param instance
	 *            the instance to compute the distribution for
	 * @return the computed class probabilities
	 * @throws Exception
	 *             if computation fails
	 */
	@Override
	public double[] distributionForInstance(Instance instance) throws Exception {

		if (m_zeroR != null) {
			return m_zeroR.distributionForInstance(instance);
		} else {
			return m_Tree.distributionForInstance(instance);
		}
	}

	/**
	 * Outputs the decision tree.
	 * 
	 * @return a string representation of the classifier
	 */
	@Override
	public String toString() {

		// only ZeroR model?
		if (m_zeroR != null) {
			StringBuffer buf = new StringBuffer();
			buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
			buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
			buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
			buf.append(m_zeroR.toString());
			return buf.toString();
		}

		if (m_Tree == null) {
			return "RandomTree: no model has been built yet.";
		} else {
			return "\nRandomTree\n==========\n" + m_Tree.toString(0) + "\n" + "\nSize of the tree : "
					+ m_Tree.numNodes() + (getMaxDepth() > 0 ? ("\nMax depth of tree: " + getMaxDepth()) : (""));
		}
	}

	/**
	 * Returns graph describing the tree.
	 * 
	 * @return the graph describing the tree
	 * @throws Exception
	 *             if graph can't be computed
	 */
	@Override
	public String graph() throws Exception {

		if (m_Tree == null) {
			throw new Exception("RandomTree: No model built yet.");
		}
		StringBuffer resultBuff = new StringBuffer();
		m_Tree.toGraph(resultBuff, 0, null);
		String result = "digraph RandomTree {\n" + "edge [style=bold]\n" + resultBuff.toString() + "\n}\n";
		return result;
	}

	/**
	 * Returns the type of graph this classifier represents.
	 * 
	 * @return Drawable.TREE
	 */
	@Override
	public int graphType() {
		return Drawable.TREE;
	}

	/**
	 * Builds the classifier to generate a partition.
	 */
	@Override
	public void generatePartition(Instances data) throws Exception {

		buildClassifier(data);
	}

	/**
	 * Computes array that indicates node membership. Array locations are
	 * allocated based on breadth-first exploration of the tree.
	 */
	@Override
	public double[] getMembershipValues(Instance instance) throws Exception {

		if (m_zeroR != null) {
			double[] m = new double[1];
			m[0] = instance.weight();
			return m;
		} else {

			// Set up array for membership values
			double[] a = new double[numElements()];

			// Initialize queues
			Queue<Double> queueOfWeights = new LinkedList<Double>();
			Queue<Tree> queueOfNodes = new LinkedList<Tree>();
			queueOfWeights.add(instance.weight());
			queueOfNodes.add(m_Tree);
			int index = 0;

			// While the queue is not empty
			while (!queueOfNodes.isEmpty()) {

				a[index++] = queueOfWeights.poll();
				Tree node = queueOfNodes.poll();

				// Is node a leaf?
				if (node.m_Attribute <= -1) {
					continue;
				}

				// Compute weight distribution
				double[] weights = new double[node.m_Successors.length];
				if (instance.isMissing(node.m_Attribute)) {
					System.arraycopy(node.m_Prop, 0, weights, 0, node.m_Prop.length);
				} else if (m_Info.attribute(node.m_Attribute).isNominal()) {
					weights[(int) instance.value(node.m_Attribute)] = 1.0;
				} else {
					if (instance.value(node.m_Attribute) < node.m_SplitPoint) {
						weights[0] = 1.0;
					} else {
						weights[1] = 1.0;
					}
				}
				for (int i = 0; i < node.m_Successors.length; i++) {
					queueOfNodes.add(node.m_Successors[i]);
					queueOfWeights.add(a[index - 1] * weights[i]);
				}
			}
			return a;
		}
	}

	/**
	 * Returns the number of elements in the partition.
	 */
	@Override
	public int numElements() throws Exception {

		if (m_zeroR != null) {
			return 1;
		}
		return m_Tree.numNodes();
	}

	/**
	 * The inner class for dealing with the tree.
	 */
	protected class Tree implements Serializable {

		/** For serialization */
		private static final long serialVersionUID = 3549573538656522569L;

		/** The subtrees appended to this tree. */
		protected Tree[] m_Successors;

		/** The attribute to split on. */
		protected int m_Attribute = -1;

		/** The split point. */
		protected double m_SplitPoint = Double.NaN;

		/** The proportions of training instances going down each branch. */
		protected double[] m_Prop = null;

		/** Class probabilities from the training data. */
		protected double[] m_ClassDistribution = null;

		/**
		 * Backfits the given data into the tree.
		 */
		public void backfitData(Instances data) throws Exception {

			// Compute initial class counts
			double[] classProbs = new double[data.numClasses()];
			for (int i = 0; i < data.numInstances(); i++) {
				Instance inst = data.instance(i);
				classProbs[(int) inst.classValue()] += inst.weight();
			}

			// Fit data into tree
			backfitData(data, classProbs);
		}

		/**
		 * Computes class distribution of an instance using the decision tree.
		 * 
		 * @param instance
		 *            the instance to compute the distribution for
		 * @return the computed class distribution
		 * @throws Exception
		 *             if computation fails
		 */
		public double[] distributionForInstance(Instance instance) throws Exception {

			double[] returnedDist = null;

			if (m_Attribute > -1) {

				// Node is not a leaf
				if (instance.isMissing(m_Attribute)) {

					// Value is missing
					returnedDist = new double[m_Info.numClasses()];

					// Split instance up
					for (int i = 0; i < m_Successors.length; i++) {
						double[] help = m_Successors[i].distributionForInstance(instance);
						if (help != null) {
							for (int j = 0; j < help.length; j++) {
								returnedDist[j] += m_Prop[i] * help[j];
							}
						}
					}
				} else if (m_Info.attribute(m_Attribute).isNominal()) {

					// For nominal attributes
					returnedDist = m_Successors[(int) instance.value(m_Attribute)].distributionForInstance(instance);
				} else {

					// For numeric attributes
					if (instance.value(m_Attribute) < m_SplitPoint) {
						returnedDist = m_Successors[0].distributionForInstance(instance);
					} else {
						returnedDist = m_Successors[1].distributionForInstance(instance);
					}
				}
			}

			// Node is a leaf or successor is empty?
			if ((m_Attribute == -1) || (returnedDist == null)) {

				// Is node empty?
				if (m_ClassDistribution == null) {
					if (getAllowUnclassifiedInstances()) {
						return new double[m_Info.numClasses()];
					} else {
						return null;
					}
				}

				// Else return normalized distribution
				double[] normalizedDistribution = m_ClassDistribution.clone();
				Utils.normalize(normalizedDistribution);
				return normalizedDistribution;
			} else {
				return returnedDist;
			}
		}

		/**
		 * Outputs one node for graph.
		 * 
		 * @param text
		 *            the buffer to append the output to
		 * @param num
		 *            unique node id
		 * @return the next node id
		 * @throws Exception
		 *             if generation fails
		 */
		public int toGraph(StringBuffer text, int num) throws Exception {

			int maxIndex = Utils.maxIndex(m_ClassDistribution);
			String classValue = m_Info.classAttribute().value(maxIndex);

			num++;
			if (m_Attribute == -1) {
				text.append("N" + Integer.toHexString(hashCode()) + " [label=\"" + num + ": " + classValue + "\""
						+ "shape=box]\n");
			} else {
				text.append("N" + Integer.toHexString(hashCode()) + " [label=\"" + num + ": " + classValue + "\"]\n");
				for (int i = 0; i < m_Successors.length; i++) {
					text.append("N" + Integer.toHexString(hashCode()) + "->" + "N"
							+ Integer.toHexString(m_Successors[i].hashCode()) + " [label=\""
							+ m_Info.attribute(m_Attribute).name());
					if (m_Info.attribute(m_Attribute).isNumeric()) {
						if (i == 0) {
							text.append(" < " + Utils.doubleToString(m_SplitPoint, 2));
						} else {
							text.append(" >= " + Utils.doubleToString(m_SplitPoint, 2));
						}
					} else {
						text.append(" = " + m_Info.attribute(m_Attribute).value(i));
					}
					text.append("\"]\n");
					num = m_Successors[i].toGraph(text, num);
				}
			}

			return num;
		}

		/**
		 * Outputs a leaf.
		 * 
		 * @return the leaf as string
		 * @throws Exception
		 *             if generation fails
		 */
		protected String leafString() throws Exception {

			double sum = 0, maxCount = 0;
			int maxIndex = 0;
			if (m_ClassDistribution != null) {
				sum = Utils.sum(m_ClassDistribution);
				maxIndex = Utils.maxIndex(m_ClassDistribution);
				maxCount = m_ClassDistribution[maxIndex];
			}
			return " : " + m_Info.classAttribute().value(maxIndex) + " (" + Utils.doubleToString(sum, 2) + "/"
					+ Utils.doubleToString(sum - maxCount, 2) + ")";
		}

		/**
		 * Recursively outputs the tree.
		 * 
		 * @param level
		 *            the current level of the tree
		 * @return the generated subtree
		 */
		protected String toString(int level) {

			try {
				StringBuffer text = new StringBuffer();

				if (m_Attribute == -1) {

					// Output leaf info
					return leafString();
				} else if (m_Info.attribute(m_Attribute).isNominal()) {

					// For nominal attributes
					for (int i = 0; i < m_Successors.length; i++) {
						text.append("\n");
						for (int j = 0; j < level; j++) {
							text.append("|   ");
						}
						text.append(
								m_Info.attribute(m_Attribute).name() + " = " + m_Info.attribute(m_Attribute).value(i));
						text.append(m_Successors[i].toString(level + 1));
					}
				} else {

					// For numeric attributes
					text.append("\n");
					for (int j = 0; j < level; j++) {
						text.append("|   ");
					}
					text.append(m_Info.attribute(m_Attribute).name() + " < " + Utils.doubleToString(m_SplitPoint, 2));
					text.append(m_Successors[0].toString(level + 1));
					text.append("\n");
					for (int j = 0; j < level; j++) {
						text.append("|   ");
					}
					text.append(m_Info.attribute(m_Attribute).name() + " >= " + Utils.doubleToString(m_SplitPoint, 2));
					text.append(m_Successors[1].toString(level + 1));
				}

				return text.toString();
			} catch (Exception e) {
				e.printStackTrace();
				return "RandomTree: tree can't be printed";
			}
		}

		/**
		 * Recursively backfits data into the tree.
		 * 
		 * @param data
		 *            the data to work with
		 * @param classProbs
		 *            the class distribution
		 * @throws Exception
		 *             if generation fails
		 */
		protected void backfitData(Instances data, double[] classProbs) throws Exception {

			// Make leaf if there are no training instances
			if (data.numInstances() == 0) {
				m_Attribute = -1;
				m_ClassDistribution = null;
				m_Prop = null;
				return;
			}

			// Check if node doesn't contain enough instances or is pure
			// or maximum depth reached
			m_ClassDistribution = classProbs.clone();

			/*
			 * if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum ||
			 * Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)]
			 * , Utils .sum(m_ClassDistribution))) {
			 * 
			 * // Make leaf m_Attribute = -1; m_Prop = null; return; }
			 */

			// Are we at an inner node
			if (m_Attribute > -1) {

				// Compute new weights for subsets based on backfit data
				m_Prop = new double[m_Successors.length];
				for (int i = 0; i < data.numInstances(); i++) {
					Instance inst = data.instance(i);
					if (!inst.isMissing(m_Attribute)) {
						if (data.attribute(m_Attribute).isNominal()) {
							m_Prop[(int) inst.value(m_Attribute)] += inst.weight();
						} else {
							m_Prop[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1] += inst.weight();
						}
					}
				}

				// If we only have missing values we can make this node into a
				// leaf
				if (Utils.sum(m_Prop) <= 0) {
					m_Attribute = -1;
					m_Prop = null;
					return;
				}

				// Otherwise normalize the proportions
				Utils.normalize(m_Prop);

				// Split data
				Instances[] subsets = splitData(data);

				// Go through subsets
				for (int i = 0; i < subsets.length; i++) {

					// Compute distribution for current subset
					double[] dist = new double[data.numClasses()];
					for (int j = 0; j < subsets[i].numInstances(); j++) {
						dist[(int) subsets[i].instance(j).classValue()] += subsets[i].instance(j).weight();
					}

					// Backfit subset
					m_Successors[i].backfitData(subsets[i], dist);
				}

				// If unclassified instances are allowed, we don't need to store
				// the
				// class distribution
				if (getAllowUnclassifiedInstances()) {
					m_ClassDistribution = null;
					return;
				}

				// Otherwise, if all successors are non-empty, we don't need to
				// store
				// the class distribution
				boolean emptySuccessor = false;
				for (int i = 0; i < subsets.length; i++) {
					if (m_Successors[i].m_ClassDistribution == null) {
						emptySuccessor = true;
						return;
					}
				}
				m_ClassDistribution = null;

				// If we have a least two non-empty successors, we should keep
				// this tree
				/*
				 * int nonEmptySuccessors = 0; for (int i = 0; i <
				 * subsets.length; i++) { if
				 * (m_Successors[i].m_ClassDistribution != null) {
				 * nonEmptySuccessors++; if (nonEmptySuccessors > 1) { return; }
				 * } }
				 * 
				 * // Otherwise, this node is a leaf or should become a leaf
				 * m_Successors = null; m_Attribute = -1; m_Prop = null; return;
				 */
			}
		}

		/**
		 * Recursively generates a tree.
		 * 
		 * @param data
		 *            the data to work with
		 * @param classProbs
		 *            the class distribution
		 * @param attIndicesWindow
		 *            the attribute window to choose attributes from
		 * @param random
		 *            random number generator for choosing random attributes
		 * @param depth
		 *            the current depth
		 * @throws Exception
		 *             if generation fails
		 */
		protected void buildTree(Instances data, double[] classProbs, int[] attIndicesWindow, Random random, int depth)
				throws Exception {
			// Make leaf if there are no training instances
			if (data.numInstances() == 0) {
				m_Attribute = -1;
				m_ClassDistribution = null;
				m_Prop = null;
				return;
			}
			// Check if node doesn't contain enough instances or is pure or maximum depth reached
			m_ClassDistribution = classProbs.clone();
			if (Utils.sum(m_ClassDistribution) < 2 * m_MinNum || Utils.eq(m_ClassDistribution[Utils.maxIndex(m_ClassDistribution)], Utils.sum(m_ClassDistribution))|| ((getMaxDepth() > 0) && (depth >= getMaxDepth()))) {
				// Make leaf
				m_Attribute = -1;
				m_Prop = null;
				return;
			}
			// Compute class distributions and value of splitting criterion for each attribute
			double val = -Double.MAX_VALUE;
			double split = -Double.MAX_VALUE;
			double[][] bestDists = null;
			double[] bestProps = null;
			int bestIndex = 0;
			// Handles to get arrays out of distribution method
			double[][] props = new double[1][0];
			double[][][] dists = new double[1][0][0];
			// Investigate K random attributes
			int attIndex = 0;
			int windowSize = attIndicesWindow.length;
			int k = m_KValue;
			boolean gainFound = false;
			while ((windowSize > 0) && (k-- > 0 || !gainFound)) {
				int chosenIndex = random.nextInt(windowSize);
				attIndex = attIndicesWindow[chosenIndex];
				// shift chosen attIndex out of window
				attIndicesWindow[chosenIndex] = attIndicesWindow[windowSize - 1];
				attIndicesWindow[windowSize - 1] = attIndex;
				windowSize--;
				double currSplit = distribution(props, dists, attIndex, data);
				double currVal = gain(dists[0], priorVal(dists[0]));
				if (Utils.gr(currVal, 0))
					gainFound = true;
				if ((currVal > val) || ((currVal == val) && (attIndex < bestIndex))) {
					val = currVal;
					bestIndex = attIndex;
					split = currSplit;
					bestProps = props[0];
					bestDists = dists[0];
				}
			}
			// Find best attribute
			m_Attribute = bestIndex;
			// Any useful split found?
			if (Utils.gr(val, 0)) {
				// Build subtrees
				m_SplitPoint = split;
				m_Prop = bestProps;
				Instances[] subsets = splitData(data);//according to m_Prop
				m_Successors = new Tree[bestDists.length];
				for (int i = 0; i < bestDists.length; i++) {
					m_Successors[i] = new Tree();
					m_Successors[i].buildTree(subsets[i], bestDists[i], attIndicesWindow, random, depth + 1);
				}
				// If all successors are non-empty, we don't need to store the
				// class
				// distribution
				boolean emptySuccessor = false;
				for (int i = 0; i < subsets.length; i++) {
					if (m_Successors[i].m_ClassDistribution == null) {
						emptySuccessor = true;
						break;
					}
				}
				if (!emptySuccessor) {
					m_ClassDistribution = null;
				}
			} else {
				// Make leaf
				m_Attribute = -1;
			}
		}

		/**
		 * Computes size of the tree.
		 * 
		 * @return the number of nodes
		 */
		public int numNodes() {

			if (m_Attribute == -1) {
				return 1;
			} else {
				int size = 1;
				for (int i = 0; i < m_Successors.length; i++) {
					size += m_Successors[i].numNodes();
				}
				return size;
			}
		}

		/**
		 * Splits instances into subsets based on the given split.
		 * 
		 * @param data
		 *            the data to work with
		 * @return the subsets of instances
		 * @throws Exception
		 *             if something goes wrong
		 */
		protected Instances[] splitData(Instances data) throws Exception {

			// Allocate array of Instances objects
			Instances[] subsets = new Instances[m_Prop.length];
			for (int i = 0; i < m_Prop.length; i++) {
				subsets[i] = new Instances(data, data.numInstances());
			}

			// Go through the data
			for (int i = 0; i < data.numInstances(); i++) {

				// Get instance
				Instance inst = data.instance(i);

				// Does the instance have a missing value?
				if (inst.isMissing(m_Attribute)) {

					// Split instance up
					for (int k = 0; k < m_Prop.length; k++) {
						if (m_Prop[k] > 0) {
							Instance copy = (Instance) inst.copy();
							copy.setWeight(m_Prop[k] * inst.weight());
							subsets[k].add(copy);
						}
					}

					// Proceed to next instance
					continue;
				}

				// Do we have a nominal attribute?
				if (data.attribute(m_Attribute).isNominal()) {
					subsets[(int) inst.value(m_Attribute)].add(inst);

					// Proceed to next instance
					continue;
				}

				// Do we have a numeric attribute?
				if (data.attribute(m_Attribute).isNumeric()) {
					subsets[(inst.value(m_Attribute) < m_SplitPoint) ? 0 : 1].add(inst);
					// Proceed to next instance
					continue;
				}

				// Else throw an exception
				throw new IllegalArgumentException("Unknown attribute type");
			}

			// Save memory
			for (int i = 0; i < m_Prop.length; i++) {
				subsets[i].compactify();
			}

			// Return the subsets
			return subsets;
		}

		/**
		 * Computes class distribution for an attribute.
		 * 
		 * @param props
		 * @param dists
		 * @param att
		 *            the attribute index
		 * @param data
		 *            the data to work with
		 * @throws Exception
		 *             if something goes wrong
		 */
		protected double distribution(double[][] props, double[][][] dists, int att, Instances data) throws Exception {

			double splitPoint = Double.NaN;
			Attribute attribute = data.attribute(att);
			double[][] dist = null;
			int indexOfFirstMissingValue = data.numInstances();
			if (attribute.isNominal()) {
				// For nominal attributes
				dist = new double[attribute.numValues()][data.numClasses()];
				for (int i = 0; i < data.numInstances(); i++) {
					Instance inst = data.instance(i);
					if (inst.isMissing(att)) {
						// Skip missing values at this stage
						if (indexOfFirstMissingValue == data.numInstances()) {
							indexOfFirstMissingValue = i;
						}
						continue;
					}//P(A|C)
					dist[(int) inst.value(att)][(int) inst.classValue()] += inst.weight();
				}
			} else {
				// For numeric attributes
				double[][] currDist = new double[2][data.numClasses()];
				dist = new double[2][data.numClasses()];
				// Sort data
				data.sort(att);
				for (int j = 0; j < data.numInstances(); j++) {	// Move all instances into second subset
					Instance inst = data.instance(j);
					if (inst.isMissing(att)) {
						// Can stop as soon as we hit a missing value
						indexOfFirstMissingValue = j;
						break;
					}
					currDist[1][(int) inst.classValue()] += inst.weight();
				}
				double priorVal = priorVal(currDist);// Value before splitting
				for (int j = 0; j < currDist.length; j++) {
					System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);	// Save initial distribution
				}
				// Try all possible split points
				double currSplit = data.instance(0).value(att);
				double currVal, bestVal = -Double.MAX_VALUE;
				for (int i = 0; i < indexOfFirstMissingValue; i++) {
					Instance inst = data.instance(i);
					// Can we place a sensible split point here?
					if (inst.value(att) > currSplit) {
						// Compute gain for split point
						currVal = gain(currDist, priorVal);
						// Is the current split point the best point so far?
						if (currVal > bestVal) {
							// Store value of current point
							bestVal = currVal;
							// Save split point
							splitPoint = (inst.value(att) + currSplit) / 2.0;
							// Check for numeric precision problems
							if (splitPoint <= currSplit) {
								splitPoint = inst.value(att);
							}
							// Save distribution
							for (int j = 0; j < currDist.length; j++) {
								System.arraycopy(currDist[j], 0, dist[j], 0, dist[j].length);
							}
						}
						currSplit = inst.value(att);
					}
					currDist[0][(int) inst.classValue()] += inst.weight();// Shift over the weight
					currDist[1][(int) inst.classValue()] -= inst.weight();
				}
			}
			// Compute weights for subsets
			props[0] = new double[dist.length];
			for (int k = 0; k < props[0].length; k++) {
				props[0][k] = Utils.sum(dist[k]);
			}
			if (Utils.eq(Utils.sum(props[0]), 0)) {
				for (int k = 0; k < props[0].length; k++) {
					props[0][k] = 1.0 / props[0].length;
				}
			} else {
				Utils.normalize(props[0]);
			}
			// Distribute weights for instances with missing values
			for (int i = indexOfFirstMissingValue; i < data.numInstances(); i++) {
				Instance inst = data.instance(i);
				if (attribute.isNominal()) {
					// Need to check if attribute value is missing
					if (inst.isMissing(att)) {
						for (int j = 0; j < dist.length; j++) {
							dist[j][(int) inst.classValue()] += props[0][j] * inst.weight();
						}
					}
				} else {
					// Can be sure that value is missing, so no test required
					for (int j = 0; j < dist.length; j++) {
						dist[j][(int) inst.classValue()] += props[0][j] * inst.weight();
					}
				}
			}
			// Return distribution and split point
			dists[0] = dist;
			return splitPoint;
		}

		/**
		 * Computes value of splitting criterion before split.
		 * 
		 * @param dist
		 *            the distributions
		 * @return the splitting criterion
		 */
		protected double priorVal(double[][] dist) {

			return ContingencyTables.entropyOverColumns(dist);
		}

		/**
		 * Computes value of splitting criterion after split.
		 * 
		 * @param dist
		 *            the distributions
		 * @param priorVal
		 *            the splitting criterion
		 * @return the gain after the split
		 */
		protected double gain(double[][] dist, double priorVal) {

			return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
		}

		/**
		 * Returns the revision string.
		 * 
		 * @return the revision
		 */
		public String getRevision() {
			return RevisionUtils.extract("$Revision: 9526 $");
		}

		/**
		 * Outputs one node for graph.
		 * 
		 * @param text
		 *            the buffer to append the output to
		 * @param num
		 *            the current node id
		 * @param parent
		 *            the parent of the nodes
		 * @return the next node id
		 * @throws Exception
		 *             if something goes wrong
		 */
		protected int toGraph(StringBuffer text, int num, Tree parent) throws Exception {

			num++;
			if (m_Attribute == -1) {
				text.append("N" + Integer.toHexString(Tree.this.hashCode()) + " [label=\"" + num + leafString() + "\""
						+ " shape=box]\n");

			} else {
				text.append("N" + Integer.toHexString(Tree.this.hashCode()) + " [label=\"" + num + ": "
						+ m_Info.attribute(m_Attribute).name() + "\"]\n");
				for (int i = 0; i < m_Successors.length; i++) {
					text.append("N" + Integer.toHexString(Tree.this.hashCode()) + "->" + "N"
							+ Integer.toHexString(m_Successors[i].hashCode()) + " [label=\"");
					if (m_Info.attribute(m_Attribute).isNumeric()) {
						if (i == 0) {
							text.append(" < " + Utils.doubleToString(m_SplitPoint, 2));
						} else {
							text.append(" >= " + Utils.doubleToString(m_SplitPoint, 2));
						}
					} else {
						text.append(" = " + m_Info.attribute(m_Attribute).value(i));
					}
					text.append("\"]\n");
					num = m_Successors[i].toGraph(text, num, this);
				}
			}

			return num;
		}
	}

	/**
	 * Main method for this class.
	 * 
	 * @param argv
	 *            the commandline parameters
	 * @throws Exception 
	 */
	public static void main(String[] argv) throws Exception {
//		runClassifier(new RandomTree(), argv);
		Instances insts = Util.readWeatherNumeric();
		RandomTree rt = new RandomTree();
		rt.buildClassifier(insts);
		rt.classifyInstance(insts.instance(0));
	}
}
